rm(list=ls())
# Set the working directory to where folders named after the samples are located.
# The folder contains spliced.mtx, unspliced.mtx, barcodes and gene id files, and json files produced by scKB that documents the sequencing stats.
setwd("/scratch/AG_Ohler/CheWei/scKB")
library(tidyverse)
library(Seurat)
library(RColorBrewer)
library(future)
library(cowplot)
library(ComplexHeatmap)
library(circlize)
library(ggrepel)
#for 200gb ram
options(future.globals.maxSize = 200000 * 1024^2)
rc.integrated <- readRDS("./supp_data/Ground_Tissue_Atlas.rds")
table(rc.integrated$celltype.anno)
# Simple QC label
rc.integrated$celltype.anno <- gsub("Putative Quiescent Center", "Quiescent Center", rc.integrated$celltype.anno, ignore.case = FALSE, perl = FALSE,
fixed = T, useBytes = FALSE)
order <- c("Quiescent Center", "Stem Cell Niche", "Columella", "Lateral Root Cap", "Atrichoblast", "Trichoblast", "Cortex", "Endodermis", "Pericycle", "Phloem", "Xylem", "Procambium", "Unknown")
palette <- c("#BD53FF", "#DED3FE", "#5AB953", "#BFEF45", "#008080", "#21B6A8", "#82B6FF", "#0000FF","#FF9900","#E6194B", "#9A6324", "#FFE119","#EEEEEE")
rc.integrated$celltype.anno <- factor(rc.integrated$celltype.anno, levels = order[sort(match(unique(rc.integrated$celltype.anno),order))])
color <- palette[sort(match(unique(rc.integrated$celltype.anno),order))]
options(repr.plot.width=8, repr.plot.height=6)
DimPlot(rc.integrated, reduction = "umap", group.by = "celltype.anno", cols=color)
options(repr.plot.width=8, repr.plot.height=6)
time_plt <- DimPlot(rc.integrated,
group.by = "time.anno",
order = c("Maturation","Elongation","Meristem"),
cols = c("#DCEDC8", "#42B3D5", "#1A237E"))
time_plt
table(rc.integrated$consensus.time.group)
options(repr.plot.width=8, repr.plot.height=6)
DimPlot(rc.integrated, reduction = "umap", group.by = "consensus.time.group", cols=brewer.pal(10,"Spectral"))
# FindMarkers for celltype and time combination
rc.integrated$cell_group <- paste(rc.integrated$celltype.anno, rc.integrated$consensus.time.group, sep="_")
table(rc.integrated$cell_group)
table(rc.integrated$celltype.anno, rc.integrated$consensus.time.group)
Idents(rc.integrated) <- "cell_group"
Clust_Markers <- FindAllMarkers(rc.integrated,
max.cells.per.ident = 5000,
only.pos=T,
test.use="roc")
feature_names <- read_tsv("./supp_data/features.tsv.gz", col_names = c("gene", "Name", "Type")) %>%
select(-Type) %>%
distinct()
Clust_Markers <- left_join(Clust_Markers, feature_names)
Clust_Markers %>% group_by(cluster) %>% tally()
Clust_Markers %>% group_by(cluster) %>% top_n(1, myAUC)
Clust_Markers <- separate(Clust_Markers, cluster, into=c("celltype", "stage"), sep="_", remove=F)
Clust_Markers <- mutate(Clust_Markers, pct.diff=pct.1-pct.2)
Clust_Markers <- arrange(Clust_Markers, desc(pct.diff)) %>%
group_by(cluster) %>%
mutate(pct.diff_rank=dplyr::row_number()) %>%
arrange(desc(avg_diff)) %>%
mutate(avg_diff_rank=dplyr::row_number()) %>%
arrange(desc(myAUC)) %>%
mutate(myAUC_rank=dplyr::row_number()) %>%
mutate(combined_rank_raw=(pct.diff_rank + avg_diff_rank + myAUC_rank)/3) %>%
arrange(combined_rank_raw) %>%
mutate(combined_rank=dplyr::row_number()) %>%
select(-combined_rank_raw) %>%
arrange(combined_rank)
Clust_Markers
table(Clust_Markers$stage)
Clust_Markers$stage <- factor(Clust_Markers$stage, levels=c("T0", "T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9"))
rc.integrated$consensus.time.group <- factor(rc.integrated$consensus.time.group, levels=c("T0", "T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9"))
Idents(rc.integrated) <- "consensus.time.group"
# pseudobulk of each stage
afm <- as.matrix(rc.integrated@assays$integrated@data)
pooled <- matrix(nrow=nrow(afm), ncol = 0)
for (i in unique(rc.integrated@meta.data$consensus.time.group)) {
m <- afm[,which(rc.integrated@meta.data$consensus.time.group==i)]
pooled <- cbind(pooled, rowSums(m)/ncol(m))
}
unique(rc.integrated@meta.data$consensus.time.group)
colnames(pooled) <- unique(rc.integrated@meta.data$consensus.time.group)
pooled_df <- as_tibble(pooled, rownames = "gene")
pooled_df_long <- pivot_longer(pooled_df, cols = 2:ncol(pooled_df), names_to = "stage", values_to = "expression")
long_order <- tibble(stage=paste("T", rep(0:9), sep=""),
order=rep(1:10))
long_order
(pooled_df_long <- left_join(pooled_df_long, long_order) %>%
arrange(order))
(long_corrs <- pooled_df_long %>%
group_by(gene) %>%
summarise(long_cor = cor(order, expression, method = "spearman")))
Clust_Markers <- left_join(Clust_Markers, long_corrs) %>%
mutate(abs_long_cor=abs(long_cor)) %>%
arrange(-abs_long_cor)
Clust_Markers
genes_to_plt_df <- filter(Clust_Markers, celltype %in% c("Endodermis")) %>%
group_by(stage) %>%
top_n(20, -combined_rank) %>%
top_n(10, abs_long_cor) %>%
ungroup() %>%
arrange(stage)
# genes manually added to plotting list - these are known markers we want to make sure are there
to_add <- filter(Clust_Markers, celltype %in% c("Endodermis")) %>% filter(Name=="SCR" | Name=="MYB36" | Name=="CASP4")
(genes_to_plt_df <- bind_rows(genes_to_plt_df, to_add) %>%
ungroup() %>%
group_by(gene) %>%
top_n(1, -combined_rank) %>% # order genes by stage of highest rank
ungroup() %>% arrange(stage))
genes_to_plt <- unique(genes_to_plt_df$gene)
genes_to_plt_endo <- rev(genes_to_plt) # order to pointing upwards
genes_to_plt_df <- filter(Clust_Markers, celltype %in% c("Cortex")) %>%
group_by(stage) %>%
top_n(20, -combined_rank) %>%
top_n(10, abs_long_cor) %>%
ungroup() %>%
arrange(stage)
# to add AT4G09760 AT5G02000
to_add <- filter(Clust_Markers, celltype %in% c("Cortex")) %>% filter(gene %in% c("AT4G09760", "AT5G02000", "AT5G64620"))
(genes_to_plt_df <- bind_rows(genes_to_plt_df, to_add) %>%
ungroup() %>%
group_by(gene) %>%
top_n(1, -combined_rank) %>%
ungroup() %>% arrange(stage))
(genes_to_plt_df <- genes_to_plt_df %>%
ungroup() %>%
group_by(gene) %>%
top_n(1, -combined_rank) %>%
ungroup() %>% arrange(stage))
genes_to_plt <- unique(genes_to_plt_df$gene)
genes_to_plt_cortex <- genes_to_plt
# subset for endodermis
Endo_QC <- subset(rc.integrated, celltype.anno %in% c("Quiescent Center", "Stem Cell Niche", "Endodermis"))
# pseudobulk of each stage of endodermis
afm <- as.matrix(Endo_QC@assays$integrated@data)
pooled_endo <- matrix(nrow=nrow(afm), ncol = 0)
for (i in unique(Endo_QC@meta.data$consensus.time.group)) {
m <- afm[,which(Endo_QC@meta.data$consensus.time.group==i)]
pooled_endo <- cbind(pooled_endo, rowSums(m)/ncol(m))
}
colnames(pooled_endo) <- unique(Endo_QC@meta.data$consensus.time.group)
(endo_sub_m <- pooled_endo[genes_to_plt_endo, c("T0", "T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9")])
# quantile normalize
mat = endo_sub_m
mat2 = t(apply(mat, 1, function(x) {
q10 = quantile(x, 0.1)
q90 = quantile(x, 0.9)
x[x < q10] = q10
x[x > q90] = q90
scale(x)
}))
colnames(mat2) = colnames(mat)
# side annotation with color to match umap
endo_sa = rowAnnotation(foo = anno_block(gp = gpar(fill = "#0000FF"),
labels = c("Endodermis"),
labels_gp = gpar(col = "white", fontsize = 18)))
# genes to mark on side of heatmap
(endo_mat_for_mark <- data.frame(mat2) %>%
rownames_to_column("gene") %>%
left_join(feature_names) %>%
mutate(index=dplyr::row_number()) %>%
select(Name, index) %>%
filter(Name %in% c("XTH16", "SCR", "MYB36", "CASP1", "CASP2", "CASP3", "CASP4")))
# at - index of genes you want to mark
# labels - Names to show
endo_mark_anno = rowAnnotation(foo = anno_mark(at = endo_mat_for_mark$index, labels = endo_mat_for_mark$Name, labels_gp = gpar(col = "black", fontsize = 16)))
Endo_hm <- Heatmap(mat2,
col = colorRamp2(c(-1.5, 0, 1.5),
c('blue','white','red')),
show_column_names = T,
cluster_columns = F,
cluster_rows=F,
show_row_names = F,
left_annotation=endo_sa,
right_annotation=endo_mark_anno,
heatmap_legend_param = list(title_position="topcenter", title = "Expression", direction="horizontal"))
draw(Endo_hm, padding = unit(c(10, 10, 10, 10), "mm"), heatmap_legend_side = "top")
# cortex subset
Cor_QC <- subset(rc.integrated, celltype.anno %in% c("Quiescent Center", "Stem Cell Niche", "Cortex"))
# pseudobulk of each stage of cortex
afm <- as.matrix(Cor_QC@assays$integrated@data)
pooled_cortex <- matrix(nrow=nrow(afm), ncol = 0)
for (i in unique(Cor_QC@meta.data$consensus.time.group)) {
m <- afm[,which(Cor_QC@meta.data$consensus.time.group==i)]
pooled_cortex <- cbind(pooled_cortex, rowSums(m)/ncol(m))
}
colnames(pooled_cortex) <- unique(Cor_QC@meta.data$consensus.time.group)
(cor_sub_m <- pooled_cortex[genes_to_plt_cortex, c("T0", "T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9")])
# annotation bar for consensus time
col_fun <- brewer.pal(10,"Spectral")
names(col_fun) <- c("T0", "T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9")
col_fun
ha = HeatmapAnnotation(`Consensus Time Group` = c("T0", "T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9"), col = list(`Consensus Time Group` = col_fun), show_legend = F)
# quantile normalize
mat = cor_sub_m
mat2 = t(apply(mat, 1, function(x) {
q10 = quantile(x, 0.1)
q90 = quantile(x, 0.9)
x[x < q10] = q10
x[x > q90] = q90
scale(x)
}))
colnames(mat2) = colnames(mat)
# side annotation to match cortex colors
cortex_sa = rowAnnotation(foo = anno_block(gp = gpar(fill = "#82B6FF"),
labels = c("Cortex"),
labels_gp = gpar(col = "black", fontsize = 18)))
# genes to label on side of heatmap for cortex
(cor_mat_for_mark <- data.frame(mat2) %>%
rownames_to_column("gene") %>%
left_join(feature_names) %>%
mutate(index=dplyr::row_number()) %>%
select(Name, index) %>%
filter(Name %in% c("AT1G17285", "AT5G02000", "C/VIF2", "CYP72A15", "PER56", "PER69", "AT4G09760")))
# at - index of genes you want to mark
# labels - Names to show
cor_mark_anno = rowAnnotation(foo = anno_mark(at = cor_mat_for_mark$index, labels = cor_mat_for_mark$Name, labels_gp = gpar(col = "black", fontsize = 16)))
# ['#d8b365','#f5f5f5','#5ab4ac']
Cortex_hm <- Heatmap(mat2,
col = colorRamp2(c(-1.5, 0, 1.5),
c('blue','white','red')),
show_column_names = F,
cluster_columns = F,
cluster_rows=F,
show_row_names = F,
show_heatmap_legend = F,
left_annotation=cortex_sa,
right_annotation=cor_mark_anno)
Cortex_hm
# text annotation for consensus time
ht = columnAnnotation(foo = anno_text(c("T0", "T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9"), rot = 0, just = "center", gp = gpar(fontsize = 16)))
options(repr.plot.width=12, repr.plot.height=10)
ht_list = ht %v% ha %v% Endo_hm %v% Cortex_hm
draw(ht_list, ht_gap = unit(0.2, "cm"), padding = unit(c(2, 2, 5, 10), "mm"), heatmap_legend_side = "bottom")
# grab heatmap as a grob to arrange in cowplot
hm_final <- grid.grabExpr(draw(ht_list, ht_gap = unit(0.2, "cm"), padding = unit(c(2, 2, 5, 10), "mm"), heatmap_legend_side = "bottom"))## bottom, left, top and right
# output heatmap as pdf
pdf("./supp_data/WT_Ground_tissue_heatmap.pdf", width = 12, height = 10)
draw(ht_list, ht_gap = unit(0.2, "cm"), padding = unit(c(2, 2, 5, 10), "mm"), heatmap_legend_side = "bottom") ## bottom, left, top and right
dev.off()
DefaultAssay(rc.integrated) <- "integrated"
(SCR <- FeaturePlot(rc.integrated, features = "AT3G54220",
cols = c("grey", "red"), label=F, repel=F, pt.size = 0.3, order = T, min.cutoff = "q1", max.cutoff = "q99") + ggtitle("SCR"))
(MYB36 <- FeaturePlot(rc.integrated, features = "AT5G57620",
cols = c("grey", "red"), label=F, repel=F, pt.size = 0.3, order = F, min.cutoff = "q1", max.cutoff = "q99") + ggtitle("MYB36"))
(CASP <- FeaturePlot(rc.integrated, features = "AT2G36100",
cols = c("grey", "red"), label=F, repel=F, pt.size = 0.3, order = T, min.cutoff = "q1", max.cutoff = "q99") + ggtitle("CASP1"))
options(repr.plot.width=8, repr.plot.height=6)
(Cor_1 <- FeaturePlot(rc.integrated, features = "AT5G02000",
cols = c("grey", "red"), label=F, repel=F, pt.size = 0.3, order = T, min.cutoff = "q1", max.cutoff = "q99") + ggtitle("AT5G02000"))
options(repr.plot.width=8, repr.plot.height=6)
(Cor_2 <- FeaturePlot(rc.integrated, features = "AT5G64620",
cols = c("grey", "red"), label=F, repel=F, pt.size = 0.3, order = T, min.cutoff = "q1", max.cutoff = "q99") + ggtitle("C/VIF2"))
options(repr.plot.width=8, repr.plot.height=6)
(Cor_3 <- FeaturePlot(rc.integrated, features = "AT4G09760",
cols = c("grey", "red"), label=F, repel=F, pt.size = 0.3, order = T, min.cutoff = "q1", max.cutoff = "q99") + ggtitle("AT4G09760"))
plot_anno <- function(rc.integrated){
order <- c("Quiescent Center", "Stem Cell Niche", "Columella", "Lateral Root Cap", "Atrichoblast", "Trichoblast", "Cortex", "Endodermis", "Pericycle", "Phloem", "Xylem", "Procambium", "Unknown")
palette <- c("#9400D3","#DCD0FF", "#5AB953", "#BFEF45", "#008080", "#21B6A8", "#82B6FF", "#0000FF","#FF9900","#E6194B", "#9A6324", "#FFE119","#EEEEEE")
rc.integrated$celltype.anno <- factor(rc.integrated$celltype.anno, levels = order[sort(match(unique(rc.integrated$celltype.anno),order))])
color <- palette[sort(match(unique(rc.integrated$celltype.anno),order))]
p1 <- DimPlot(rc.integrated, reduction = "umap", group.by = "celltype.anno", cols=color) + ggtitle("Cell Type") + theme(plot.title = element_text(hjust = 0.5))
p2 <- DimPlot(rc.integrated, reduction = "umap", group.by = "time.anno", order = c("Maturation","Elongation","Meristem"),cols = c("#DCEDC8", "#42B3D5", "#1A237E")) + ggtitle("Developmental Stage") + theme(plot.title = element_text(hjust = 0.5))
p3 <- DimPlot(rc.integrated, reduction = "umap", group.by = "consensus.time.group", cols=brewer.pal(10,"Spectral")) + ggtitle("Consensus Time") + theme(plot.title = element_text(hjust = 0.5))
options(repr.plot.width=25, repr.plot.height=8)
gl <- lapply(list(p1, p2, p3), ggplotGrob)
gwidth <- do.call(unit.pmax, lapply(gl, "[[", "widths"))
gl <- lapply(gl, "[[<-", "widths", value = gwidth)
gridExtra::grid.arrange(grobs=gl, ncol=3)
}
options(repr.plot.width=28, repr.plot.height=6)
top_umaps <- plot_anno(rc.integrated)
options(repr.plot.width=24, repr.plot.height=18)
gene_umaps <- plot_grid(SCR, MYB36, CASP, Cor_1, Cor_2, Cor_3, ncol=3, align="hv")
gene_hm <- plot_grid(gene_umaps, hm_final, ncol=2, rel_widths = c(1.6, 1))
options(repr.plot.width=28, repr.plot.height=18)
plot_grid(top_umaps, gene_hm, ncol=1, rel_heights = c(1, 1.5))
ggsave("./supp_data/Endo_cortex_traj_combined.pdf",
width = 28,
height = 18)
sessionInfo()